2  Tumor: Normalization, doublet discrimination, integration, clustering

2.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(devtools)
library(presto)
library(BiocParallel)
library(glmGamPoi)
library(sctransform)
library(Seurat)
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

2.2 Load previously saved merged object

merged.18279.tumor <- readRDS("Tumor_scRNA_Part1.rds")

2.3 Normalize and scale data

Regress out mitochondrial contribution

merged.18279.tumor <- NormalizeData(merged.18279.tumor)
merged.18279.tumor <- FindVariableFeatures(merged.18279.tumor, 
                                    assay = "RNA", 
                                    layer = "data", 
                                    selection.method = "vst", 
                                    nfeatures = 5000)
merged.18279.tumor <- ScaleData(merged.18279.tumor, vars.to.regress = "percent.mt")

2.4 Run PCA

merged.18279.tumor <- RunPCA(merged.18279.tumor, npcs = 200, verbose = FALSE)
ElbowPlot(merged.18279.tumor, ndims = 200, reduction = "pca")

2.5 Plot unintegrated UMAP

merged.18279.tumor <- RunUMAP(merged.18279.tumor, 
                        reduction = "pca", 
                        dims = 1:50, 
                        reduction.name = "umap.unintegrated")
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
11:47:33 UMAP embedding parameters a = 0.9922 b = 1.112
11:47:33 Read 31415 rows and found 50 numeric columns
11:47:33 Using Annoy for neighbor search, n_neighbors = 30
11:47:33 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
11:47:37 Writing NN index file to temp file /tmp/RtmpkA48Fy/file287cc87b7f04fc
11:47:37 Searching Annoy index using 1 thread, search_k = 3000
11:47:48 Annoy recall = 100%
11:47:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
11:47:51 Initializing from normalized Laplacian + noise (using RSpectra)
11:47:53 Commencing optimization for 200 epochs, with 1386960 positive edges
11:48:07 Optimization finished
DimPlot(merged.18279.tumor, reduction = "umap.unintegrated", group.by = c("Site", "Patient", "Timepoint"))

2.6 Call preliminary clusters for the purposes of doublet discrimination

merged.18279.tumor <- FindNeighbors(merged.18279.tumor, dims = 1:50, reduction = "pca")
Computing nearest neighbor graph
Computing SNN
merged.18279.tumor <- FindClusters(merged.18279.tumor, 
                             resolution = 0.4, 
                             algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 31415
Number of edges: 1246446

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9585
Number of communities: 29
Elapsed time: 4 seconds
DimPlot(merged.18279.tumor, reduction = "umap.unintegrated", label = T)

2.7 Identify and remove doublets

This uses raw counts as input

2.7.1 Combine RNA layers

merged.18279.tumor[['RNA']] <- JoinLayers(merged.18279.tumor[['RNA']])

2.7.2 Convert seurat to sce and check colData

merged.18279.tumor.sce <- as.SingleCellExperiment(merged.18279.tumor, assay = "RNA")
merged.18279.tumor.sce
class: SingleCellExperiment 
dim: 61217 31415 
metadata(0):
assays(2): counts logcounts
rownames(61217): 5-8S-rRNA 5S-rRNA ... ZZEF1 ZZZ3
rowData names(0):
colnames(31415): P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT
  P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA ...
  P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC
  P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT
colData names(15): orig.ident nCount_RNA ... seurat_clusters ident
reducedDimNames(2): PCA UMAP.UNINTEGRATED
mainExpName: RNA
altExpNames(0):
colData(merged.18279.tumor.sce)
DataFrame with 31415 rows and 15 columns
                                                orig.ident nCount_RNA
                                               <character>  <numeric>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT SeuratProject    5288.99
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA SeuratProject    3314.99
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA SeuratProject    4434.99
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG SeuratProject    5171.98
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC SeuratProject    4295.91
...                                                    ...        ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG    SeuratProject    526.988
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC    SeuratProject    532.000
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC    SeuratProject    556.000
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC    SeuratProject    541.000
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT    SeuratProject    500.000
                                             nFeature_RNA percent.mt
                                                <integer>  <numeric>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT         1842    2.76045
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA         1716    4.83561
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA         1806    4.64488
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG         2118    3.59762
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC         2067   10.98089
...                                                   ...        ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG             252   10.05716
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC             310    7.33083
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC             269    9.89209
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC             467    8.59519
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT             398    5.10000
                                             miQC.probability   miQC.keep
                                                    <numeric> <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT         0.166588        keep
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA         0.125028        keep
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA         0.131072        keep
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG         0.168588        keep
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC         0.493406        keep
...                                                       ...         ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG           0.3196529        keep
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC           0.1016414        keep
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC           0.2969933        keep
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC           0.1737173        keep
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT           0.0620316        keep
                                                 Patient        Site
                                             <character> <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT        P101       Tumor
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA        P101       Tumor
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA        P101       Tumor
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG        P101       Tumor
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC        P101       Tumor
...                                                  ...         ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG           P108       Tumor
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC           P108       Tumor
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC           P108       Tumor
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC           P108       Tumor
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT           P108       Tumor
                                               Timepoint   IpiCohort
                                             <character> <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT         W00    2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA         W00    2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA         W00    2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG         W00    2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC         W00    2.5mgIpi
...                                                  ...         ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG             PD      5mgIpi
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC             PD      5mgIpi
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC             PD      5mgIpi
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC             PD      5mgIpi
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT             PD      5mgIpi
                                                   Assay                 Sample
                                             <character>            <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT         RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA         RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA         RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG         RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC         RNA P101_Tumor_W00_2.5mg..
...                                                  ...                    ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG            RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC            RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC            RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC            RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT            RNA P108_Tumor_PD_5mgIpi..
                                             RNA_snn_res.0.4 seurat_clusters
                                                    <factor>        <factor>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT              6               6 
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA              22              22
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA              11              11
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG              6               6 
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC              8               8 
...                                                      ...             ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG                 12              12
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC                 12              12
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC                 12              12
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC                 12              12
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT                 6               6 
                                                ident
                                             <factor>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT       6 
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA       22
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA       11
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG       6 
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC       8 
...                                               ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG          12
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC          12
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC          12
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC          12
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT          6 

2.7.3 Run scDblFinder

Set the dbr.sd very high to better allow thresholds to be set based on misclassification rates per sample

merged.18279.tumor.sce <- scDblFinder(merged.18279.tumor.sce,
                    samples = "Sample",
                    dbr.sd = 1,
                    clusters = "seurat_clusters",
                    BPPARAM = MulticoreParam(4,RNGseed=123)
                )

2.7.4 Inspect results

# Look at the classes
table(merged.18279.tumor.sce$seurat_clusters, merged.18279.tumor.sce$scDblFinder.class)
    
     singlet doublet
  0     4218     323
  1     2529     212
  2     2419     113
  3     2398     101
  4     2218     181
  5     2234       6
  6     1873     287
  7     2036      10
  8     1284     155
  9     1232     157
  10    1095      60
  11     938     126
  12     907       2
  13     563     159
  14     454      44
  15     393      70
  16     383      60
  17     355      42
  18     278      26
  19     262      26
  20     200      68
  21     200      19
  22      51     125
  23       0     138
  24      81      22
  25      81      12
  26      33      51
  27      16      54
  28       5      30
table(merged.18279.tumor.sce$Sample, merged.18279.tumor.sce$scDblFinder.class)
                             
                              singlet doublet
  P101_Tumor_W00_2.5mgIpi_RNA    1028     100
  P101_Tumor_W12_2.5mgIpi_RNA    4807     316
  P101_Tumor_W20_2.5mgIpi_RNA    1298      99
  P103_Tumor_W00_2.5mgIpi_RNA    4635     426
  P103_Tumor_W12_2.5mgIpi_RNA    5058     465
  P103_Tumor_W20_2.5mgIpi_RNA    4085     468
  P104_Tumor_PD_2.5mgIpi_RNA     3572     360
  P108_Tumor_PD_5mgIpi_RNA       4253     445
# Look at the scores
summary(merged.18279.tumor.sce$scDblFinder.score)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000039 0.0054557 0.0275547 0.1358086 0.1161005 0.9999959 

2.7.5 Save doublet classifications into main Seurat object

merged.18279.tumor$doublet_classification <- merged.18279.tumor.sce$scDblFinder.class

2.7.6 Count singlets and doublets

table(merged.18279.tumor$doublet_classification)

singlet doublet 
  28736    2679 
table(merged.18279.tumor$doublet_classification, merged.18279.tumor$seurat_clusters)
         
             0    1    2    3    4    5    6    7    8    9   10   11   12   13
  singlet 4218 2529 2419 2398 2218 2234 1873 2036 1284 1232 1095  938  907  563
  doublet  323  212  113  101  181    6  287   10  155  157   60  126    2  159
         
            14   15   16   17   18   19   20   21   22   23   24   25   26   27
  singlet  454  393  383  355  278  262  200  200   51    0   81   81   33   16
  doublet   44   70   60   42   26   26   68   19  125  138   22   12   51   54
         
            28
  singlet    5
  doublet   30

2.7.7 Plot singlets/doublets in UMAP space

DimPlot(merged.18279.tumor,reduction = "umap.unintegrated", group.by = "doublet_classification")

2.7.8 Subset object to remove doublets and count remaining cells

merged.18279.tumor.singlets <- subset(merged.18279.tumor, doublet_classification %in% c("singlet"))
dim(merged.18279.tumor.singlets)
[1] 61217 28736
# Count remaining cells per initial cluster
table(merged.18279.tumor.singlets$seurat_clusters)

   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
4218 2529 2419 2398 2218 2234 1873 2036 1284 1232 1095  938  907  563  454  393 
  16   17   18   19   20   21   22   23   24   25   26   27   28 
 383  355  278  262  200  200   51    0   81   81   33   16    5 

2.8 Remove cells with very high nCount_RNA values, set other final QC filters

merged.18279.tumor.singlets <- subset(merged.18279.tumor.singlets,
                                subset = nCount_RNA < 40000 & nCount_RNA > 1500 & nFeature_RNA > 750)
dim(merged.18279.tumor.singlets)
[1] 61217 19976

2.9 Re-compute PCA

Re-scale data now that it has been subset

merged.18279.tumor.singlets[["RNA"]] <- split(merged.18279.tumor.singlets[["RNA"]], f = merged.18279.tumor.singlets$Sample)

merged.18279.tumor.singlets <- FindVariableFeatures(merged.18279.tumor.singlets, 
                                    assay = "RNA", 
                                    layer = "data", 
                                    selection.method = "vst", 
                                    nfeatures = 5000)
merged.18279.tumor.singlets <- ScaleData(merged.18279.tumor.singlets, vars.to.regress = "percent.mt")
merged.18279.tumor.singlets <- RunPCA(merged.18279.tumor.singlets, npcs = 200, verbose = FALSE)

2.10 Run Harmony integration

merged.18279.tumor.singlets <- IntegrateLayers(merged.18279.tumor.singlets, 
                                method = HarmonyIntegration, 
                                orig.reduction = "pca",
                                new.reduction = "integrated.harmony"
)
Warning: HarmonyMatrix is deprecated and will be removed in the future from the
API in the future
Warning: Warning: The parameters do_pca and npcs are deprecated. They will be ignored for this function call and please remove parameters do_pca and npcs and pass to harmony cell_embeddings directly.
This warning is displayed once per session.
Warning: Warning: The parameter tau is deprecated. It will be ignored for this function call and please remove parameter tau in future function calls. Advanced users can set value of parameter tau by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter block.size is deprecated. It will be ignored for this function call and please remove parameter block.size in future function calls. Advanced users can set value of parameter block.size by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.harmony is replaced with parameter max_iter. It will be ignored for this function call and please use parameter max_iter in future function calls.
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.cluster is deprecated. It will be ignored for this function call and please remove parameter max.iter.cluster in future function calls. Advanced users can set value of parameter max.iter.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.cluster is deprecated. It will be ignored for this function call and please remove parameter epsilon.cluster in future function calls. Advanced users can set value of parameter epsilon.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.harmony is deprecated. It will be ignored for this function call and please remove parameter epsilon.harmony in future function calls. If users want to control if harmony would stop early or not, use parameter early_stop. Advanced users can set value of parameter epsilon.harmony by using parameter .options and function harmony_options().
This warning is displayed once per session.
Transposing data matrix
Using automatic lambda estimation
Initializing state using k-means centroids initialization
Harmony 1/10
Harmony 2/10
Harmony 3/10
Harmony 4/10
Harmony 5/10
Harmony converged after 5 iterations

2.11 Identify clusters after integration across a range of clustering resolutions

merged.18279.tumor.singlets <- FindNeighbors(merged.18279.tumor.singlets, dims = 1:50, reduction = "integrated.harmony")
Computing nearest neighbor graph
Computing SNN
merged.18279.tumor.singlets <- FindClusters(merged.18279.tumor.singlets, 
                             resolution = seq(0.1, 2, by=0.1), 
                             algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9763
Number of communities: 12
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9563
Number of communities: 16
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9434
Number of communities: 18
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9327
Number of communities: 20
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9228
Number of communities: 25
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9136
Number of communities: 26
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9045
Number of communities: 28
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8970
Number of communities: 29
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8896
Number of communities: 29
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8826
Number of communities: 30
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8761
Number of communities: 32
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8702
Number of communities: 33
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8650
Number of communities: 34
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8597
Number of communities: 34
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8548
Number of communities: 35
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8499
Number of communities: 36
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8451
Number of communities: 37
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8407
Number of communities: 41
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8369
Number of communities: 43
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 19976
Number of edges: 792019

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8329
Number of communities: 42
Elapsed time: 2 seconds
merged.18279.tumor.singlets <- RunUMAP(merged.18279.tumor.singlets,
                        reduction = "integrated.harmony",
                        dims = 1:50,
                        reduction.name = "umap.harmony")
12:04:19 UMAP embedding parameters a = 0.9922 b = 1.112
12:04:19 Read 19976 rows and found 50 numeric columns
12:04:19 Using Annoy for neighbor search, n_neighbors = 30
12:04:19 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:04:21 Writing NN index file to temp file /tmp/RtmpkA48Fy/file287cc85a21cae4
12:04:21 Searching Annoy index using 1 thread, search_k = 3000
12:04:28 Annoy recall = 100%
12:04:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
12:04:31 Initializing from normalized Laplacian + noise (using RSpectra)
12:04:33 Commencing optimization for 200 epochs, with 883792 positive edges
12:04:43 Optimization finished
table(merged.18279.tumor.singlets$seurat_clusters)

   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
1482 1321 1147 1135 1132 1106  968  945  849  848  783  709  670  538  537  485 
  16   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31 
 467  444  428  406  399  371  336  306  288  274  229  228  151  147  121  114 
  32   33   34   35   36   37   38   39   40   41 
 104   85   82   77   75   63   36   33   30   27 

2.12 Plot clusters

# Plot one as example
DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", label = TRUE, group.by = "RNA_snn_res.1")

2.13 Plot metadata in UMAP space

DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Patient")

DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Site")

DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Timepoint")

DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "IpiCohort")

DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Sample") + NoLegend()

FeaturePlot(merged.18279.tumor.singlets,reduction = "umap.harmony",features="nCount_RNA",order=T)

FeaturePlot(merged.18279.tumor.singlets,reduction = "umap.harmony",features="nFeature_RNA",order=T)

FeaturePlot(merged.18279.tumor.singlets,reduction = "umap.harmony",features="percent.mt",order=T)

2.14 Save clustered object

saveRDS(merged.18279.tumor.singlets,"Tumor_scRNA_Part2.rds")

2.15 Get session info

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] cellXY_0.99.0               scDblFinder_1.14.0         
 [3] harmony_1.2.0               alevinQC_1.16.1            
 [5] vctrs_0.6.5                 patchwork_1.3.0            
 [7] scater_1.30.1               scuttle_1.12.0             
 [9] speckle_1.0.0               Matrix_1.6-4               
[11] fishpond_2.6.2              readxl_1.4.3               
[13] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
[15] Biobase_2.62.0              GenomicRanges_1.54.1       
[17] GenomeInfoDb_1.38.8         IRanges_2.36.0             
[19] S4Vectors_0.40.2            BiocGenerics_0.48.1        
[21] MatrixGenerics_1.14.0       matrixStats_1.4.1          
[23] flexmix_2.3-19              lattice_0.22-6             
[25] SeuratWrappers_0.3.19       miQC_1.8.0                 
[27] lubridate_1.9.3             forcats_1.0.0              
[29] stringr_1.5.1               dplyr_1.1.4                
[31] purrr_1.0.2                 readr_2.1.5                
[33] tidyr_1.3.1                 tibble_3.2.1               
[35] ggplot2_3.5.1               tidyverse_2.0.0            
[37] Seurat_5.1.0                SeuratObject_5.0.2         
[39] sp_2.1-4                    sctransform_0.4.1          
[41] glmGamPoi_1.12.2            BiocParallel_1.36.0        
[43] presto_1.0.0                Rcpp_1.0.13-1              
[45] devtools_2.4.5              usethis_3.0.0              
[47] data.table_1.16.2          

loaded via a namespace (and not attached):
  [1] fs_1.6.5                  spatstat.sparse_3.1-0    
  [3] bitops_1.0-9              httr_1.4.7               
  [5] RColorBrewer_1.1-3        profvis_0.4.0            
  [7] tools_4.3.2               utf8_1.2.4               
  [9] R6_2.5.1                  DT_0.33                  
 [11] lazyeval_0.2.2            uwot_0.2.2               
 [13] urlchecker_1.0.1          withr_3.0.2              
 [15] GGally_2.2.1              gridExtra_2.3            
 [17] progressr_0.15.1          cli_3.6.3                
 [19] spatstat.explore_3.2-6    fastDummies_1.7.3        
 [21] labeling_0.4.3            spatstat.data_3.1-4      
 [23] ggridges_0.5.6            pbapply_1.7-2            
 [25] Rsamtools_2.18.0          R.utils_2.12.3           
 [27] parallelly_1.39.0         sessioninfo_1.2.2        
 [29] limma_3.58.1              RSQLite_2.3.8            
 [31] BiocIO_1.12.0             generics_0.1.3           
 [33] gtools_3.9.5              ica_1.0-3                
 [35] spatstat.random_3.2-2     ggbeeswarm_0.7.2         
 [37] fansi_1.0.6               abind_1.4-8              
 [39] R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.10               edgeR_4.0.16             
 [43] SparseArray_1.2.2         Rtsne_0.17               
 [45] blob_1.2.4                grid_4.3.2               
 [47] dqrng_0.4.1               promises_1.3.0           
 [49] crayon_1.5.3              shinydashboard_0.7.2     
 [51] miniUI_0.1.1.1            beachmat_2.18.1          
 [53] cowplot_1.1.3             KEGGREST_1.42.0          
 [55] metapod_1.10.1            pillar_1.9.0             
 [57] knitr_1.45                rjson_0.2.23             
 [59] xgboost_1.7.8.1           future.apply_1.11.3      
 [61] codetools_0.2-20          leiden_0.4.3.1           
 [63] glue_1.8.0                remotes_2.5.0            
 [65] png_0.1-8                 spam_2.11-0              
 [67] org.Mm.eg.db_3.18.0       cellranger_1.1.0         
 [69] gtable_0.3.6              cachem_1.1.0             
 [71] xfun_0.49                 S4Arrays_1.2.0           
 [73] mime_0.12                 survival_3.7-0           
 [75] bluster_1.12.0            statmod_1.5.0            
 [77] ellipsis_0.3.2            fitdistrplus_1.2-1       
 [79] ROCR_1.0-11               nlme_3.1-166             
 [81] bit64_4.5.2               RcppAnnoy_0.0.22         
 [83] irlba_2.3.5.1             vipor_0.4.7              
 [85] KernSmooth_2.23-24        DBI_1.2.3                
 [87] colorspace_2.1-1          nnet_7.3-19              
 [89] tidyselect_1.2.1          bit_4.5.0                
 [91] compiler_4.3.2            BiocNeighbors_1.20.2     
 [93] DelayedArray_0.28.0       plotly_4.10.4            
 [95] rtracklayer_1.62.0        scales_1.3.0             
 [97] lmtest_0.9-40             digest_0.6.37            
 [99] goftest_1.2-3             spatstat.utils_3.1-1     
[101] rmarkdown_2.29            RhpcBLASctl_0.23-42      
[103] XVector_0.42.0            htmltools_0.5.8.1        
[105] pkgconfig_2.0.3           sparseMatrixStats_1.14.0 
[107] fastmap_1.2.0             rlang_1.1.4              
[109] htmlwidgets_1.6.4         shiny_1.9.1              
[111] DelayedMatrixStats_1.24.0 farver_2.1.2             
[113] zoo_1.8-12                jsonlite_1.8.9           
[115] R.oo_1.27.0               BiocSingular_1.18.0      
[117] RCurl_1.98-1.16           magrittr_2.0.3           
[119] modeltools_0.2-23         GenomeInfoDbData_1.2.11  
[121] dotCall64_1.2             munsell_0.5.1            
[123] viridis_0.6.5             reticulate_1.35.0        
[125] stringi_1.8.4             zlibbioc_1.48.2          
[127] MASS_7.3-60.0.1           org.Hs.eg.db_3.18.0      
[129] plyr_1.8.9                pkgbuild_1.4.5           
[131] ggstats_0.7.0             parallel_4.3.2           
[133] listenv_0.9.1             ggrepel_0.9.6            
[135] deldir_2.0-4              Biostrings_2.70.3        
[137] splines_4.3.2             tensor_1.5               
[139] hms_1.1.3                 locfit_1.5-9.10          
[141] igraph_2.1.1              spatstat.geom_3.2-8      
[143] RcppHNSW_0.6.0            reshape2_1.4.4           
[145] ScaledMatrix_1.10.0       pkgload_1.4.0            
[147] XML_3.99-0.17             evaluate_1.0.1           
[149] scran_1.30.2              BiocManager_1.30.25      
[151] tzdb_0.4.0                httpuv_1.6.15            
[153] RANN_2.6.2                polyclip_1.10-7          
[155] future_1.34.0             scattermore_1.2          
[157] rsvd_1.0.5                xtable_1.8-4             
[159] restfulr_0.0.15           svMisc_1.2.3             
[161] RSpectra_0.16-2           later_1.3.2              
[163] viridisLite_0.4.2         AnnotationDbi_1.64.1     
[165] GenomicAlignments_1.38.2  memoise_2.0.1            
[167] beeswarm_0.4.0            tximport_1.28.0          
[169] cluster_2.1.6             timechange_0.3.0         
[171] globals_0.16.3